# -*- coding: utf-8 -*-


"""
实现步骤：
1.发送网络请求
2.获取数据
3.解析数据：提取视频地址及标题
4.发送网络请求：请求每一个视频地址，获取视频二进制数据
5.保存视频
6.通过关键词下载视频/指定一个用户的视频/翻页下载

视频评论区
视频水印
视频弹幕
还有一个就是发布的账号需要养，展示考虑的是通过爬取视频来养号。
"""

import os
import pprint
import time
import requests
import json
import re
# fake_useragent第三方库，实现随机请求头的设置 pip install fake-useragent
from fake_useragent import UserAgent
from moviepy.video.io.VideoFileClip import VideoFileClip
from moviepy.audio.io.AudioFileClip import AudioFileClip


def kuaishou_download():
    # 1.发送请求 get post
    # post 表单请求
    # <Response [200]>：Response：响应体对象 200：访问成功

    # 禁用服务器缓存，忽略ssl验证
    ua = UserAgent(use_external_data=False, verify_ssl=False).random
    # 请求头：伪装 用来伪装python代码，防止被识别出是爬虫程序
    headers = {
        'accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Content-Length': '1380',
        'content-type': 'application/json',
        'Cookie': 'kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; did=web_acb408fff3a5f7cd020782d58bb9caa9; ktrace-context=1|MS43NjQ1ODM2OTgyODY2OTgyLjI4ODYxOTgxLjE2MzczNzIwMzc5NTkuMTQ1NDUxNA==|MS43NjQ1ODM2OTgyODY2OTgyLjI3NzMzOTY1LjE2MzczNzIwMzc5NTkuMTQ1NDUxNQ==|0|graphql-server|webservice|false|NA; client_key=65890b29; userId=1232368006; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABXhLnnN974NXDx7wxD7EXA0gUwiENGncAU1PMNvGRI8hgQVPES30K2a6e8FZ9L3yv89WVXIZ5I1HsDjjWJlzDijZgHPj64KgQ8dkTm8-Aq5monZejiGHAuenrIuDovugsUnncYRtFHLY_bmEtKpBDoaswti5UnDOkiVHAuhMMPlqdPBKYwV_LZ3SGFMeznHUrJv5Wg4o4C45yi-1iuOPyDRoSsmhEcimAl3NtJGybSc8y6sdlIiCHg_pUdXqAoXPplQJ-iHcM2h_MTI_3Wkdnw9ucUMR5UCgFMAE; kuaishou.server.web_ph=b3651a369fb9eb9f33d30ccc2cc691a5ecbf',
        'Host': 'www.kuaishou.com',
        'Origin': 'https://www.kuaishou.com',
        'Referer': 'https://www.kuaishou.com/search/video?searchKey=%E6%85%A2%E6%91%87',
        'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        "User-Agent": ua
    }

    # 关键词
    keyword = input("请输入你想要查询的关键词：")
    dir_name = keyword  # 视频保存文件夹
    # 判断该文件夹是否存在，不存在则创建
    if not os.path.exists(dir_name):
        os.mkdir(dir_name)
    # 实现翻页
    for page_num in range(1, 6):
        data = {
            'operationName': "visionSearchPhoto",
            'query': "query visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n  visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      type\n      author {\n        id\n        name\n        following\n        headerUrl\n        headerUrls {\n          cdn\n          url\n          __typename\n        }\n        __typename\n      }\n      tags {\n        type\n        name\n        __typename\n      }\n      photo {\n        id\n        duration\n        caption\n        likeCount\n        realLikeCount\n        coverUrl\n        photoUrl\n        liked\n        timestamp\n        expTag\n        coverUrls {\n          cdn\n          url\n          __typename\n        }\n        photoUrls {\n          cdn\n          url\n          __typename\n        }\n        animatedCoverUrl\n        stereoType\n        videoRatio\n        __typename\n      }\n      canAddComment\n      currentPcursor\n      llsid\n      status\n      __typename\n    }\n    searchSessionId\n    pcursor\n    aladdinBanner {\n      imgUrl\n      link\n      __typename\n    }\n    __typename\n  }\n}\n",
            'variables': {'keyword': keyword, 'pcursor': f'{page_num}', 'page': "search"}
        }
        # 'content-type' :'application/json' 要求返回的data是一个json字符串-->字典类型
        # print(type(data))
        data = json.dumps(data)  # 将字典类型转换为字符串类型
        # print(type(data))
        time.sleep(2)
        # try:
        url = "https://www.kuaishou.com/graphql"
        # 发送一个post请求，url:链接地址，headers:伪装，data:查询参数
        # 1.发送网络请求
        response = requests.post(url=url, headers=headers, data=data)
        # 2.获取数据
        json_data = response.json()
        # pprint.pprint(json_data)
        # 3.解析数据--提取视频地址及标题
        # 字典类型--键值对的方式取值
        feeds_list = json_data['data']['visionSearchPhoto']['feeds']
        print(len(feeds_list))
        print(feeds_list)
        for index, feeds in enumerate(feeds_list):
            # feeds是字典类型
            # 获取视频标题
            title = feeds['photo']['caption']
            photoUrl = feeds['photo']['photoUrl']
            # print(title, photoUrl)
            # 在Windows操作系统中，文件名不能包含一些特殊字符，需要进行替换
            new_title = re.sub(r'[\/:*?"<>|\s#@]', '_', title)
            new_title = new_title.replace("，", "").replace("？", "").replace("！", "").replace("_", "")
            # 4.发送网络请求：请求每一个视频地址，获取视频二进制数据
            mp4_data = requests.get(photoUrl).content
            # 5.保存视频
            with open(dir_name + "/" + keyword + str(index) + '.mp4', mode='wb') as f:
                f.write(mp4_data)
                print(f'{new_title}--下载完成')
        if len(feeds_list) < 20:
            break
        # except Exception as e:
        #     print(e)


def down_bili_video():
    bilibili_url = "https://www.bilibili.com/video/BV1GT4y1W7Fs"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    "accept-encoding":"1"
    }
    session = requests.session()
    response = session.get(bilibili_url, headers=headers)
    html = response.text
    # 使用正则表达式提取视频信息
    match_result = re.search(r'__playinfo__=(.*?)</script>', html)
    if match_result:
        play_info_str = match_result.group(1)
        play_info_dict = json.loads(play_info_str)
        # 获取视频下载链接
        dash_info = play_info_dict['data']['dash']
        video_info = dash_info['video'][0]
        audio_info = dash_info['audio'][0]
        video_url = video_info['base_url']
        audio_url = audio_info['base_url']
    else:
        raise Exception('无法解析视频信息')

    # 示例使用：
    print("B站视频下载地址：", video_url)
    print("B站音频下载地址：", audio_url)
    # 下载视频和音频
    video_content = session.get(video_url).content
    audio_content = session.get(audio_url).content
    # print(video_content)
    # print(audio_content)
    # 保存视频和音频到本地文件
    with open('video.mp4', 'wb') as f:
        f.write(video_content)
    with open('audio.mp3', 'wb') as f:
        f.write(audio_content)
    # # 加载视频和音频，合并为一个MP4文件
    # video_clip = VideoFileClip('video.mp4')
    # audio_clip = AudioFileClip('audio.mp3')
    # final_video_clip = video_clip.set_audio(audio_clip)
    # final_video_clip.write_videofile('final.mp4')
    # print("合并完成，输出文件：final.mp4")


if __name__ =="__main__":
    down_bili_video()
